package com.force.langchain4j.filesource;

import lombok.SneakyThrows;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.springframework.core.annotation.Order;

import java.io.InputStream;
@Order(2)
public class WordFileLoader  extends JSONFileLoader implements FileSourceLoader{
	private static final String fileType="docx";
	private static final String fileType2="doc";
	@SneakyThrows
	@Override
	public Object getSourceContent(FileBaseLoader file) {
		String suffix = file.getSuffix();
		InputStream inputStream = file.getInputStream();
		if(fileType.equals(suffix)||fileType2.equals(suffix)){
			XWPFDocument document = new XWPFDocument(inputStream);
			XWPFWordExtractor extractor = new XWPFWordExtractor(document);
			String content = extractor.getText();
			return content;
		}else{
			return super.getSourceContent(file);
		}
	}
}
